########## R EXAMPLES ######### # Chapter 3 # 1 install.packages("causalweight") # install causalweight package library(causalweight) # load causalweight package data(JC) # load JC data ?JC # call documentation for JC data D=JC$assignment # define treatment (assignment to JC) Y=JC$earny4 # define outcome (earnings in fourth year) mean(Y[D==1])-mean(Y[D==0]) # compute the ATE # 2 library(causalweight) # load causalweight package library(lmtest) # load lmtest package library(sandwich) # load sandwich package data(JC) # load JC data D=JC$assignment # define treatment (assignment to JC) Y=JC$earny4 # define outcome (earnings in fourth year) ols=lm(Y~D) # run OLS regression coeftest(ols, vcov=vcovHC) # output with heteroscedasticity-robust se # 3 bs=function(data, indices) { # defines function bs for bootstrapping dat=data[indices,] # creates bootstrap sample according to indices coefficients=lm(dat)$coef # estimates coefficients in bootstrap sample return(coefficients) # returns coefficients } # closes the function bs library(boot) # load boot package bootdata=data.frame(Y,D) # data frame with Y,D for bootstrap procedure set.seed(1) # set seed results = boot(data=bootdata, statistic=bs, R=1999) # 1999 bootstrap estimations results # displays the results tstat=results$t0[2]/sd(results$t[,2]) # compute the t-statistic 2*pnorm(-abs(tstat)) # compute the p-value # 4 library(causalweight) # load causalweight package library(lmtest) # load lmtest package library(sandwich) # load sandwich package data(wexpect) # load wexpect data ?wexpect # call documentation for wexpect data D1=wexpect$treatmentinformation # define first treatment (wage information) D2=wexpect$treatmentorder # define second treatment (order of questions) Y=wexpect$wexpect2 # define outcome (wage expectations) ols=lm(Y~D1+D2) # run OLS regression coeftest(ols, vcov=vcovHC) # output with heteroscedasticity robust se # 5 library(datarium) # load datarium package library(np) # load np package data(marketing) # load marketing data ?marketing # call documentation for marketing data D=marketing$newspaper # define treatment (newspaper advertising) Y=marketing$sales # define outcome (sales) results=npregbw(Y~D) # kernel regression plot(results, plot.errors.method="asymptotic") # plot regression function plot(results, gradients=TRUE, plot.errors.method="asymptotic") # plot effects # 6 library(causalweight) # load causalweight package library(lmtest) # load lmtest package library(sandwich) # load sandwich package data(coffeeleaflet) # load coffeeleaflet data attach(coffeeleaflet) # store all variables in own objects ?coffeeleaflet # call documentation for coffeeleaflet data D=treatment # define treatment (leaflet) Y=awarewaste # define outcome (aware of waste production) X=cbind(mumedu,sex) # define covariates (grade, gender, age) ols=lm(Y~D+X) # run OLS regression coeftest(ols, vcov=vcovHC) # output with heteroscedasticity robust se # Chapter 4 # 7 library(Matching) # load Matching package library(Jmisc) # load Jmisc package library(lmtest) # load lmtest package library(sandwich) # load sandwich package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects ?lalonde # call documentation for lalonde data D=treat # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates DXdemeaned=D*demean(X) # interaction of D and demeaned X ols=lm(Y~D+X+DXdemeaned) # run OLS regression coeftest(ols, vcov=vcovHC) # output # 8 library(Matching) # load Matching package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects D=treat # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates pairmatching=Match(Y=Y, Tr=D, X=X) # pair matching summary(pairmatching) # matching output # 9 matching=Match(Y=Y, Tr=D, X=X, M=3, BiasAdjust = TRUE) # 1:M matching summary(matching) # matching output # 10 library(Matching) # load Matching package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects D=treat # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates ps=glm(D~X,family=binomial)$fitted # estimate the propensity score by logit psmatching=Match(Y=Y, Tr=D, X=ps, BiasAdjust = TRUE) # propensity score matching summary(psmatching) # matching output # 11 library(Matching) # load Matching package library(boot) # load boot package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects D=treat # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates bs=function(data, indices) { # defines function bs for bootstrapping dat=data[indices,] # bootstrap sample according to indices ps=glm(dat[,2:ncol(dat)],data=dat,family=binomial)$fitted # propensity score effect=Match(Y=dat[,1], Tr=dat[,2], X=ps, BiasAdjust = TRUE)$est # ATET return(effect) # returns the estimated ATET } # closes the function bs bootdata=data.frame(Y,D,X) # data frame for bootstrap procedure set.seed(1) # set seed results = boot(data=bootdata, statistic=bs, R=999) # 999 bootstrap estimations results # displays the results tstat=results$t0/sd(results$t) # compute the t-statistic 2*pnorm(-abs(tstat)) # compute the p-value # 12 library(causalweight) # load causalweight package library(COUNT) # load COUNT package data(lbw) # load lbw data attach(lbw) # store all variables in own objects D=smoke # define treatment (mother smoking) Y=bwt # outcome (birthweight in grams) X=cbind(race==1, age, lwt, ptl, ht, ui, ftv) # covariates set.seed(1) # set seed ipw=treatweight(y=Y,d=D,x=X, boot=999) # run IPW with 999 bootstraps ipw$effect # show ATE ipw$se # show standard error ipw$pval # show p-value # 13 library(CBPS) # load CBPS package library(lmtest) # load lmtest package library(sandwich) # load sandwich package cbps=CBPS(D~X, ATT = 0) # covariate balancing for ATE estimation results=lm(Y~D, weights=cbps$weights) # weighted regression coeftest(results, vcov = vcovHC) # show results # 14 library(drgee) # load drgee package library(COUNT) # load COUNT package data(lbw) # load lbw data attach(lbw) # store all variables in own objects D=smoke # define treatment (mother smoking) Y=bwt # outcome (birthweight in grams) X=cbind(race==1, age, lwt, ptl, ht, ui, ftv) # covariates dr=drgee(oformula=formula(Y~X), eformula=formula(D~X), elink="logit") # DR reg summary(dr) # show results # 15 library(COUNT) # load COUNT package library(kdensity) # load kdensity package data(lbw) # load lbw data attach(lbw) # store all variables in own objects D=smoke # define treatment (mother smoking) Y=bwt # outcome (birthweight in grams) X=cbind(race==1, age, lwt, ptl, ht, ui, ftv) # covariates ps=glm(D~X,family=binomial)$fitted # estimate the propensity score by logit psdens1=kdensity(ps[D==1]) # density of propensity score among treated psdens0=kdensity(ps[D==0]) # density of propensity score among non-treated par(mfrow=c(2,2)) # specify a figure with four graphs (2X2) plot(psdens1) # plot density for treated plot(psdens0) # plot density for non-treated hist(ps[D==1]) # plot histogram of p-score for treated hist(ps[D==0]) # plot histogram of p-score for non-treated summary(ps[D==1]) # summary statistics for p-scores among treated summary(ps[D==0]) # summary statistics p-scores among non-treated # 16 library(MatchIt) # load MatchIt package output=matchit(D~X) # pair matching (ATET) on propensity score plot(output,type="hist") # plot common support before/after matching summary(output,standardize=TRUE) # 17 library(Matching) # load Matching package output1=Match(Y=Y, Tr=D, X=ps) # pair matching (ATET) on p-score MatchBalance(D~ptl, match.out=output1) # covariate balance before/after matching output2=Match(Y=Y, Tr=D, X=ps, CommonSupport=TRUE) # pair matching (ATET) MatchBalance(D~lwt, match.out=output2) # covariate balance before/after matching summary(output1) # ATET without common support summary(output2) # ATET with common support # 18 library(causalweight) # load causalweight package set.seed(1) # set seed to 1 ipw=treatweight(y=ptl,d=D,x=X, boot=999)# run IPW with 999 bootstraps ipw$effect # show mean difference in X ipw$pval # show p-value # 19 set.seed(1) # set seed to 1 ipw=treatweight(y=ptl,d=D,x=X, trim=0.1, boot=999)# run IPW with 999 bootstraps ipw$effect # show mean difference in X ipw$pval # show p-value ipw$ntrimmed # number of trimmed units # 20 library(causalweight) # load causalweight package library(devtools) # load devtools package install_github("ehkennedy/npcausal") # install npcausal package library(npcausal) # load npcausal package data(games) # load games data games_nomis=na.omit(games) # drop observations with missings attach(games_nomis) # attach data X=cbind(year,userscore, genre=="Action") # define covariates D=metascore # define treatment Y=sales # define outcome results=ctseff(y=Y, a=D, x=X, bw.seq=seq(from=1,to=5,by=0.5)) # DR estimation plot.ctseff(results) # potential outcome-treatment relation # 21 library(qte) # load qte package D=metascore>75 # define binary treatment (score>75) dat=data.frame(Y,D,X) # create data frame QTE=ci.qte(Y~D, x=X, data=dat) # estimate QTE across different ranks ggqte(QTE) # plot QTEs across ranks (tau) # 22 library(causalweight) # load causalweight package data(JC) # load JC data X0=JC[,2:29] # define pre-treatment covariates X0 X1=JC[,30:36] # define post-treatment covariates X1 D1=JC[,37] # define treatment (training) in first year D1 D2=JC[,38] # define treatment (training) in second year D2 Y2=JC[,44] # define outcome (earnings in fourth year) Y2 set.seed(1) # set seed output=dyntreatDML(y2=Y2,d1=D1,d2=D2,x0=X0,x1=X1) # doubly robust estimation output$effect; output$se; output$pval # effect, standard error, p-value # 23 output=dyntreatDML(y2=Y2,d1=D1,d2=D2,x0=X0,x1=X1, d2treat=0) # estimation output$effect; output$se; output$pval # effect, standard error, p-value # 24 library(causalweight) # load causalweight package data(wexpect) # load wexpect data attach(wexpect) # attach data X=cbind(age,swiss,motherhighedu,fatherhighedu) # define covariates D=male # define treatment M=cbind(business,econ,communi,businform) # define mediator Y=wexpect2 # define outcome medDML(y=Y, d=D, m=M, x=X) # estimate causal mechanisms # Chapter 5 # 25 library(causalweight) # load causalweight package data(JC) # load JC data X=JC[,2:29] # define covariates D=JC[,37] # define treatment (training) in first year Y=JC[,46] # define outcome (health state after 4 years) output=treatDML(y=Y, d=D, x=X) # double machine learning output$effect; output$se; output$pval # effect, standard error, p-value # 26 output=treatDML(y=Y,d=D,x=X,MLmethod="randomforest") # double machine learning output$effect; output$se; output$pval # effect, standard error, p-value # 27 library(grf) # load grf package library(causalweight) # load causalweight package data(JC) # load JC data X=JC[,2:29] # define covariates D=JC[,37] # define treatment (training) in first year Y=JC[,40] # outcome (proportion employed in third year) set.seed(1) # set seed cf=causal_forest(X=X, Y=Y, W=D) # run causal forest ATE=average_treatment_effect(cf) # compute ATE pval=2*pnorm(-abs(ATE[1]/ATE[2])) # compute the p-value ATE; pval # provide ATE, standard error, and p-value # 28 CATE=cf$predictions # store CATEs in own variable hist(CATE) # distribution of CATEs # 29 library(lmtest) # load lmtest package library(sandwich) # load sandwich package highCATE=CATE>median(CATE) # dummy for high CATE ols=lm(JC$age~highCATE) # regress CATEs on gender coeftest(ols, vcov=vcovHC) # output # 30 best_linear_projection(forest=cf,A=JC$female) # regression of function on gender # 31 library(randomForest) # load randomForest package dat=data.frame(CATE,X) # define data frame randomf=randomForest(CATE~. ,data=dat) # predict CATE as a function of X importance(randomf) # show predictive importance of X # 32 library(Matching) # load Matching package library(policytree) # load policytree package library(DiagrammeR) # load DiagrammeR package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects D=factor(treat) # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates forest=multi_arm_causal_forest(X=X, Y=Y, W=D) #estimate treatment+outcome models influence=double_robust_scores(forest) # obtain efficient influence functions Xpol=cbind(age,educ,nodegr) # relevant X for optimal policy tree=policy_tree(X=Xpol, Gamma=influence, depth=2) # policies for 4 subgroups plot(tree) # plot the tree with optimal policies # Chapter 6 # 33 library(causalweight) # load causalweight package data(JC) # load JC data Z=JC$assignment # define instrument (assignment to JC) D=JC$trainy1 # define treatment (training in 1st year) Y=JC$earny4 # define outcome (earnings in fourth year) ITT=mean(Y[Z==1])-mean(Y[Z==0]) # estimate intention-to-treat effect (ITT) first=mean(D[Z==1])-mean(D[Z==0]) # estimate first stage effect (complier share) LATE=ITT/first # compute LATE ITT; first; LATE # show ITT, first stage effect, and LATE # 34 library(AER) # load AER package LATE=ivreg(Y~D|Z) # run two stage least squares regression summary(LATE,vcov = vcovHC) # results with heteroscedasticity-robust se # 35 library(LARF) # load LARF package library(causalweight) # load causalweight package data(c401k) # load 401(k) pension data D=c401k[,3] # treatment: participation in pension plan Z=c401k[,4] # instrument: eligibility for pension plan Y=c401k[,2] # outcome: net financial assets in 1000 USD X=as.matrix(c401k[,5:11]) # covariates set.seed(1) # set seed LATE=lateweight(y=Y, d=D, z=Z, x=X, boot=299) # compute LATE (299 bootstraps) LATE$effect; LATE$se.effect; LATE$pval.effect # show LATE results LATE$first; LATE$se.first; LATE$pval.first # show first stage results # 36 library(npcausal) # load npcausal package set.seed(1) # set seed ivlate(y=Y, a=D, z=Z, x=X) # estimate LATE by double machine learning # 37 library(localIV) # load localIV package data(toydata) # load toydata D=toydata$d # define binary treatment Z=toydata$z # define continuous instrument Y=toydata$y # define outcome X=toydata$x # define covariate MTE=mte(selection=D~X+Z, outcome=Y~X) # LIV estimation of MTE MTEs=mte_at(u=seq(0.05, 0.95, 0.01), model=MTE) # predict MTEs at mean of X plot(x=MTEs$u,y=MTEs$value,xlab="p(Z, mean X)",ylab="MTE at mean X") #plot # Chapter 7 # 38 library(wooldridge) # load wooldridge package library(multiwayvcov) # load multiwayvcov package library(lmtest) # load lmtest package data(kielmc) # load kielmc data attach(kielmc) # attach data Y=rprice # define outcome D=nearinc # define treatment group T=y81 # define period dummy interact=D*T # treatment-period interaction did=lm(Y~D+T+interact) # DiD regression vcovCL=cluster.vcov(model=did, cluster=cbd) # cluster: distance to center (cbd) coeftest(did, vcov=vcovCL) # DiD results with cluster st.error # 39 library(causalweight) # load causalweight package X=cbind(area, rooms, baths) # define covariates set.seed(1) # set seed to 1 out=didweight(y=Y,d=D,t=T,x=X,boot=399,cluster=cbd) # DiD with cluster se out$effect; out$se; out$pvalue # effect, se, and p-value # 40 library(did) # load did package data(mpdta) # load mpdta data out=att_gt(yname="lemp", tname="year", gname="first.treat", idname="countyreal", xformla=~lpop, clustervars="countyreal", data=mpdta) # doubly robust did summary(out) # group-time-specific ATETs ggdid(out) # plot DiD results meanATET=aggte(out) # generate averages over ATETs summary(meanATET) # report averaged ATETs # 41 library(qte) # load qte package library(wooldridge) # load wooldridge package data(kielmc) # load kielmc data cic=CiC(rprice~nearinc,t=1981,tmin1=1978,tname="year",data=kielmc) # run CiC ggqte(cic) # plot QTETs # Chapter 8 # 42 library(devtools) # load devtools package install_github("synth-inference/synthdid") # install synthdid package library(synthdid) # load synthdid package data(california_prop99) # load smoking data dat=panel.matrices(california_prop99) # prepare data set.seed(1) # set seed out=synthdid_estimate(Y=dat$Y, N0=dat$N0, T0=dat$T0) # synthetic DiD se = sqrt(vcov(out, method='placebo')) # placebo standard error out[1]; se # show results # 43 set.seed(1) # set seed out=synthdid_estimate(Y=dat$Y, N0=dat$N0,T0=dat$T0, omega.intercept=FALSE, weights=list(lambda=rep(0,dat$T0))) # synthetic control se = sqrt(vcov(out, method='placebo')) # placebo standard error out[1]; se # show results plot(out) # plot effects over time # Chapter 9 # 44 library(rdrobust) # load rdrobust library data(rdrobust_RDsenate) # data on elections for US Senate Y=rdrobust_RDsenate$vote # outcome is vote share of Democrats R=rdrobust_RDsenate$margin # running variable is margin of winning results=rdrobust(y=Y, x=R) # sharp RDD summary(results) # show results rdplot(y=Y, x=R) # plot outcome against running variable # 45 library(rdd) # load rdd library DCdensity(runvar=R) # run the McCrary (2008) sorting test # 46 library(devtools) # load devtools package install_github("kolesarm/RDHonest") # install RDHonest package library(RDHonest) # load RDHonest package data(rcp) # load rcp data Y=rcp$cn # outcome is expenditures on non-durables R=rcp$elig_year # running var based on eligibility to retire D=rcp$retired # treatment is retirement status results=rdrobust(y=Y, x=R, fuzzy=D) # fuzzy RDD summary(results) # show results # 47 library(haven) # load haven package data=read_dta("C:/finaldata.dta") # load data Y=data$pers_total # define outcome (total personnel) R=data$forcing # define running variable D=data$costequalgrants # define treatment (grants) results=rdrobust(y=Y, x=R, fuzzy=D, deriv=1, p=1) # run fuzzy RKD summary(results) # show results # 48 library(bunching) # load bunching package data(bunching_data) # load bunching data Y=bunching_data$kink_vector # define outcome (with bunching at value 10000) set.seed(1) # set seed b=bunchit(z_vector=Y,zstar=10000,binwidth=50,bins_l=20,bins_r=20,t0=0,t1=.2)#est b$B; b$B_sd; b$plot # show results # Chapter 10 # 49 library(experiment) # load experiment package library(causalweight) # load causalweight package data(JC) # load JC data treat=JC$assignment # random treatment (assignment to JC) outcome=JC$earny4 # define outcome (earnings in 4. year) selection=JC$pworky4>0 # sample selection: employed in 4. year outcome[selection==0]=NA # recode non-selected outcomes as NA dat=data.frame(treat,selection,outcome) # generate data frame results=ATEbounds(outcome~factor(treat),data=dat) # compute worst case bounds results$bounds; results$bonf.ci # bounds on ATE + confidence intervals # 50 library(devtools) # load devtools package install_github("vsemenova/leebounds") # install leebounds package library(leebounds) # load leebounds package results=leebounds(dat) # bounds (monotonic selection in treat) results$lower_bound; results$upper_bound # bounds on ATE under monotonicity # 51 library(rbounds) # load rbounds package library(Matching) # load Matching package data(lalonde) # load lalonde data attach(lalonde) # store all variables in own objects D=treat # define treatment (training) Y=re78 # define outcome X=cbind(age,educ,nodegr,married,black,hisp,re74,re75,u74,u75) # covariates set.seed(1) # set seed output=Match(Y=Y, Tr=D, X=X, replace=FALSE)# pair matching (ATET),no replacement treatedY=Y[output$index.treated] # treated outcomes nontreatedY=Y[output$index.control] # nontreated outcomes hlsens(treatedY, nontreatedY, Gamma=2, GammaInc = 0.25) # sensitivity analysis # Chapter 11 # 52 library(devtools) # load devtools package install_github("szonszein/interference") # install interference package library(interference) # load interference package data=read.csv("C:/india.csv") # load data data=na.omit(data) # drop observations with missings group=data$village_id # cluster id group_tr=data$mech # indicator high treatment proportion indiv_tr=data$treat # individual treatment (insurance) obs_outcome=data$EXPhosp_1 # outcome (hospital expenditure) dat=data.frame(group,group_tr,indiv_tr,obs_outcome) # generate data frame estimates_hierarchical(dat) # run estimation